# -*- coding: utf-8 -*-

import scrapy

from tutorial.items import TutorialItem


class HupuSpider(scrapy.spiders.Spider):
    name = "gank"
    allowed_domains = ["gank.io"]
    start_urls = [
        "http://gank.io/history"
    ]

    def parse(self, response):
        for sel in response.xpath('//div[@class="row"]'):
            url = sel.xpath('a/@href').extract()[0]
            join_url = 'http://gank.io' + url
            yield scrapy.Request(join_url, callback=self.parse_detail)
        # yield scrapy.Request('http://gank.io/2018/07/05', callback=self.parse_detail)

    @staticmethod
    def parse_detail(response):
        print(response.url)
        item = TutorialItem()
        try:
            item['image_urls'] = response.xpath('//div[@class="outlink"]/p/img/@src').extract()
        except IndexError:
            try:
                item['image_urls'] = response.xpath('//div[@class="outlink"]/h1/img/@src').extract()
            except IndexError:
                try:
                    item['image_urls'] = response.xpath('//div[@class="outlink"]/h2/img/@src').extract()
                except IndexError:
                    item['image_urls'] = response.xpath('//div[@class="outlink"]/h3/img/@src').extract()
        yield item
